--
-- lookup.bs
--
-- implements the lookup hardware from the paper
-- "A Novel IP_Routing Lookup Scheme and Hardware Architecture
-- for Multigigabit Switching Routers" by Nen-Fu Huang and Shi-Ming Zhao.
--

package LookupU (sysLookupU, Lookup, Address, Entry) where

import FIFOF

sysLookupU :: Module Lookup
sysLookupU = mkLookup

-----------
-- TYPES --
-----------

type Address = Bit 32
type Entry = Bit 32

data Step = Start | Step1 | Step2 | Step3
          deriving (Eq, Bits)


---------------
-- FUNCTIONS --
---------------

-- retrieve the high bits
bh :: Address -> Bit 16
bh x = x[31:16]

-- retrieve the low bits
bl :: Address -> Bit 16
bl x = x[15:0]

-- retrieve the pointer/next hop field of a lookup entry
pntr :: Entry -> Bit 28
pntr x = x[31:4]

-- retrieve the offset field of a lookup entry
offset :: Entry -> Bit 4
offset x = x[3:0]

-- retrieve the 8 bit next hop from a lookup entry
nextHop :: Address -> Bit 8
nextHop x = x[11:4]

-- compute the location of the codeword
cwloc :: Address -> Entry -> Address
cwloc addr entry =
    let
          pointer :: Bit 32
          pointer = zeroExtend (pntr entry)
          q :: Bit 16
          q = selectBits16 (offset entry) (bl addr)
          s :: Bit 32
          s =  0::(Bit 20) ++ q[15:4]  -- q DIV 16
    in
          pointer + s

countBits16 :: Bit 16 -> Nat
countBits16 bs = countBits 0 16 bs

-- count the number of set bits
countBits :: Integer -> Integer -> Bit n -> Nat
countBits l h bs =
    if h == l+1 then
        zeroExtend bs[fromInteger l : fromInteger l]
    else
        let m = (h+l) `div` 2
        in  (countBits l m bs) + (countBits m h bs)

-- compute the value of q given the second half of the IP address
-- and the offset length.
selectBits16 :: Bit 4 -> Bit 16 -> Bit 16
selectBits16 n bs = selectBits 0 15 n bs

selectBits :: Integer -> Integer -> Bit n -> Bit m -> Bit m
selectBits l h n bs =
    if l <= h then
        if n == fromInteger (h-l) then
            bs[fromInteger h : fromInteger l]
        else
            selectBits (l+1) h n bs
    else
        0

-- compute the index for the next request from the first and second lookup
compCNHAIndex :: Address -> Entry -> Entry -> Bit 32
compCNHAIndex addr e1 e2 =
    let
          q :: Bit 16
          q = selectBits16 (offset e1) (bl addr)
          w :: Bit 4
          w = q[3:0]  -- q MOD 16
          wbars :: Bit 32
          wbars = countBits16 (selectBits16 w (bh e2))
          t :: Bit 32
          t = (zeroExtend (bl e2)) + wbars - 1
    in
          t

-- compute the actual next request
compCNHALoc :: Address -> Entry -> Bit 32 -> Address
compCNHALoc addr e1 index =
    let
          pointer :: Bit 32
          pointer = zeroExtend (pntr e1)
          -- actually, it's the length - 1
          offsetlen :: Nat
          offsetlen = zeroExtend (offset e1)
    in
          -- the paper says to use 2^(offsetlen-3)*4  -- why times 4?
          pointer + (1 << (offsetlen - 3)) + (index >> 2)
          --  = pntr + (2^(offsetlen-3) + (index DIV 4)

-- pick out one of the four 8 bit entries from a 32 bit value
selectEntry :: Bit 2 -> Entry -> Bit 8
selectEntry index entries = (entries << (((zeroExtend index) :: Bit 32) << 3))[31:24]


----------------
-- INTERFACES --
----------------

interface Lookup =
  dar   :: FIFOF Address                -- incoming lookup requests
  drr   :: FIFOF (Bit 8)                -- output for results
  sri   :: FIFOF Address                -- queue of requests to the SRAM
  sro   :: FIFOF Entry                  -- queue of responses from the SRAM

mkLookup :: Module Lookup
mkLookup =
  module

    darS :: FIFOF Address                -- incoming lookup requests
    darS <- mkFIFOF
    drrS :: FIFOF (Bit 8)                -- output for results
    drrS <- mkFIFOF
    sriS :: FIFOF Address                -- queue of requests to the SRAM
    sriS <- mkFIFOF
    sroS :: FIFOF Entry                  -- queue of responses from the SRAM
    sroS <- mkFIFOF

    -- internal storage

    state :: Reg Step
    state <- mkReg Start

    -- stores the result of the first lookup (pntr & offset)
    entry1 :: Reg Entry
    entry1 <- mkRegU

    index :: Reg (Bit 2)
    index <- mkRegU

    addRules $ mkLookupRules darS drrS sriS sroS state entry1 index

    interface
      dar = darS
      drr = drrS
      sri = sriS
      sro = sroS


-----------
-- RULES --
-----------

mkLookupRules :: FIFOF Address -> FIFOF (Bit 8) -> FIFOF Address ->
                 FIFOF Entry -> Reg Step -> Reg Entry -> Reg (Bit 2) -> Rules
mkLookupRules dar drr sri sro state entry1 index =
  rules
    -- When we get a request, start the lookup process
    when state == Start, dar.notEmpty, sri.notFull
      ==> action
                sri.enq (zeroExtend (bh dar.first))
                state := Step1

    -- the result of a lookup is a 28 bit pointer/next hop and 4 bits
    -- for the offset length - 1.
    -- if the pointer/next hop field of the first lookup is <= 255,
    -- then it is a next hop (not a pointer), so return it.
    when state == Step1, pntr sro.first <= 255,
         dar.notEmpty, drr.notFull, sro.notEmpty
      ==> action
                drr.enq (zeroExtend (nextHop sro.first))
                dar.deq
                sro.deq
                state := Start

    -- if the pointer/next hop field of the first lookup is > 255,
    -- then it is a pointer to where to find the next hop, so store
    -- the info and make the next request.
    -- the result of the first lookup was a 28 bit pointer, and 4 bits
    -- indicating the offset length - 1.  The offset (the next bits of
    -- the IP address) we will call q.  The pointer indicates a NHA
    -- (next hop array) of size 2^offsetlen.  This array is actually
    -- encoded as 32 bit codewords.  We want to lookup the s-th codeword,
    -- where s = q DIV 16.
    when state == Step1, pntr sro.first > 255,
         dar.notEmpty, sri.notFull, sro.notEmpty
      ==> action
                entry1 := sro.first
                sro.deq
                sri.enq (cwloc dar.first sro.first)
                state := Step2

    -- if the offset of the first lookup is < 3, then the second lookup
    -- returns the answer (the next hop).
    when state == Step2, sro.notEmpty, offset entry1 < 3,
          dar.notEmpty, drr.notFull
      ==> action
                drr.enq sro.first[7:0]
                sro.deq
                state := Start

    -- if the offset of the first lookup is 3 or greater, then:
    -- the result of the second lookup is a 32 bit codeword consisting of
    -- a 16 bit map and a 16 bit base.  The bit in the map corresponding
    -- to the offset q is w = q MOD 16, and let |w| be the number of
    -- accumulated ones from the 0th bit to the wth bit.  The output port
    -- that we want to return is then the t-th entry in the CHNA table,
    -- which follows the codeword array, and where t = base + |w| - 1.
    -- so make one last request to the SRAM.
    when state == Step2, sro.notEmpty, offset entry1 >= 3,
         dar.notEmpty, sri.notFull
      ==> let
              t :: Bit 32
              t = compCNHAIndex dar.first entry1 sro.first
          in  action
                sri.enq (compCNHALoc dar.first entry1 t)
                sro.deq
                dar.deq  -- do this here or in Step3 ?
                index := t[1:0]  -- index = t MOD 4
                state := Step3

    -- when the final result comes from the SRAM, it's the answer,
    -- so return it.
    when state == Step3, sro.notEmpty, drr.notFull
      ==> action
                drr.enq (selectEntry index sro.first)
                sro.deq
                state := Start
